# clear the environment ===============
rm(list=ls())

final_trans <- "log"
# what_data <- "breast"
what_data <- "prostate"

1 Notes

  • Data processing

    • data are log transformed
    • different normalisations tried (no normalisation and mean normalisation)
  • data exploration (PCA on common lipids with imputed values)

  • differential expression analysis

    • proDA for DEA (no imputation, takes into account missing values)
  • enrichment analysis (ORA)

1.1 Questions/remarks

  • order of the samples prep and repetition?
  • what are the comparisons of interest?
  • what are the lipid sets to use for enrichment?
  • “housekeeping” lipids for normalisation?

1.2 Current set-up

pander("what data")

what data

what_data
## [1] "prostate"
# pander("final transformation")
# final_trans

2 Data importation and processing

2.1 Data importation

assay_data = read.csv(paste0("data/0192CE_FTMS only_2022-10-19_",
                              what_data,"_assay.csv"),
                      check.names =FALSE, row.names = 1)

col_data = read.csv(paste0("data/0192CE_FTMS only_2022-10-19_",
                              what_data,"_colData.csv"),
                      check.names =FALSE, row.names = 1)

row_data = read.csv(paste0("data/0192CE_FTMS only_2022-10-19_",
                              what_data,"_rowData.csv"),
                    check.names =FALSE, row.names = 1)

identical(rownames(assay_data), rownames(row_data))
## [1] TRUE
identical(colnames(assay_data),rownames(col_data))
## [1] TRUE
se_Species <- SummarizedExperiment(assays = as.matrix(assay_data),
                               rowData = row_data,
                               colData = col_data)

2.2 Lipid sets

# annotation with lipidr: "class", "length", "unsat"
# lipidr::gen_lipidsets(rownames(dat_Species), min_size = 2)

Lipid class

table(rowData(se_Species)$Lipid_Class)
## 
##     CE    Cer     CL     DG     FC HexCer     PC   PC O     PE   PE O     PG 
##     19      7     28     13      1      4     26     17     23     18      8 
##     PI     PS     SM     TG 
##     20     15     10     74

3 Inspect the data characteristics

Design of experiment

df <- colData(se_Species)
table(df$Cell.Line)
## 
## 22RV1 LNCaP   PC3 
##    21    21    21
table(df$Treatment)
## 
##        ALA10      Control       Fer1_3       PunA10 PunA10_fer13      PunA2.5 
##            9            9            9            9            9            9 
##        PunA5 
##            9
table(df$Cell.Line, df$Treatment)
##        
##         ALA10 Control Fer1_3 PunA10 PunA10_fer13 PunA2.5 PunA5
##   22RV1     3       3      3      3            3       3     3
##   LNCaP     3       3      3      3            3       3     3
##   PC3       3       3      3      3            3       3     3
pander(what_data)

prostate

df %>% as.data.frame() %>%
  group_by(Cell.Line, Treatment)%>%
  summarise(n=n())%>%
  spread(Treatment, n)
## `summarise()` has grouped output by 'Cell.Line'. You can override using the
## `.groups` argument.
## # A tibble: 3 × 8
## # Groups:   Cell.Line [3]
##   Cell.Line ALA10 Control Fer1_3 PunA10 PunA10_fer13 PunA2.5 PunA5
##   <chr>     <int>   <int>  <int>  <int>        <int>   <int> <int>
## 1 22RV1         3       3      3      3            3       3     3
## 2 LNCaP         3       3      3      3            3       3     3
## 3 PC3           3       3      3      3            3       3     3

3.1 Filter the data (select prostate or breast)

cd <- colData(se_Species) %>% 
  as.data.frame() %>%
  rownames_to_column() 

samples_selected <- cd$rowname[cd$g_cell_lines==what_data]

# colors for heatmap
Repetition_col <- c("1" = "firebrick", "2" = "darkgreen", "3" = "blue")

Lipid_Class_col <- c("brown", "brown1", "chocolate1", "orange", "gold",
                     "olivedrab1", "olivedrab", "limegreen","mediumseagreen",
                     "cadetblue1",  "darkcyan", "blue","darkblue",
                     "darkviolet", "deeppink") 
set.seed(1)
Lipid_Class_col <- sample(Lipid_Class_col, size = length(Lipid_Class_col))
names(Lipid_Class_col) <- unique(rowData(se_Species)$Lipid_Class)

if(what_data=="prostate"){
  Cell.Line_col = c("22RV1" = "red","LNCaP" = "blue", "PC3" = "green")
  se_Species$Treatment <- factor(se_Species$Treatment, 
                                 levels = c("Control", "ALA10", 
                                            "Fer1_3", "PunA2.5", "PunA5",
                                            "PunA10", "PunA10_fer13"))
  Treatment_col = c("Control" = "red", "ALA10" ="green" ,
                    "Fer1_3" = "goldenrod2",
                    "PunA2.5" = "darkturquoise", "PunA5" = "dodgerblue", 
                    "PunA10" = "dodgerblue4", "PunA10_fer13" = "darkorchid3")
  
  }else{
    Cell.Line_col = c("Hs578T" = "red","MCF7" = "blue")
      se_Species$Treatment <- factor(se_Species$Treatment, 
                                 levels = c("Control", "ALA", "Fer1_10", 
                                            "JA10", "PunA5", "PunA10", 
                                            "PunA20", "PunA20_Fer110" ))
      Treatment_col = c("Control" = "red", "ALA"="green",
                    "Fer1_10" = "goldenrod2", "JA10" = "palevioletred1", 
                    "PunA5" = "darkturquoise", "PunA10"= "dodgerblue", 
                                            "PunA20" = "dodgerblue4", 
                    "PunA20_Fer110" = "darkorchid3")
  }

# filter out only NA lipids
se_Species <- filterNA(se_Species, pNA = 0.99)
dim(se_Species)
## [1] 276  63

4 se_Species

4.1 Deal with missing values

4.2 Describe missing data present in the dataset

barplot_fun <- function(varOfInterest,ncol=1){
  se_Species$nNA <- nNA(se_Species)$nNAcols
  names(se_Species$nNA$pNA) <- colnames(se_Species)
  
  id <- order(colData(se_Species)[,varOfInterest])
  cols <- rainbow(n = length(unique(colData(se_Species)[,varOfInterest])))[as.factor(colData(se_Species)[id,varOfInterest])]
  par(mar=c(7,4,2,2))
  barplot(se_Species$nNA$pNA[id], las=2, ylim = c(0,100), 
          ylab = "% missing values",
                main = "% missing values per sample",
          col = cols)
  legend("top",
         legend=unique(colData(se_Species)[id,varOfInterest]),
         cex = 0.7,
         fill = unique(cols),
         bty="n", horiz = FALSE, xpd=TRUE, 
         inset=c(-0.1,0), ncol = ncol)
  
  par(mar=c(4,4,4,4))
      
}
barplot_fun("Cell.Line")

barplot_fun("Treatment", ncol=3)

X <- assay(se_Species) %>% as.matrix()
X <- is.na(X)
ord <- order(rowSums(X))
X <- matrix(as.numeric(X), ncol = ncol(X))
X2 <- X[ord,]

cd <- colData(se_Species)
rd <-  rowData(se_Species)
rd2 <-  rowData(se_Species)[ord,]

column_ha = HeatmapAnnotation(Cell.Line = cd$Cell.Line, 
                              col = list(Cell.Line = Cell.Line_col))

row_ha = rowAnnotation(Lipid_Class = as.factor(rd$Lipid_Class), 
                       col = list(Lipid_Class = Lipid_Class_col))
row_ha2 = rowAnnotation(Lipid_Class = as.factor(rd2$Lipid_Class), 
                       col = list(Lipid_Class = Lipid_Class_col))
set.seed(2024)
Heatmap(X2, name = "NA values", cluster_rows = FALSE, 
        cluster_columns = FALSE,
        top_annotation = column_ha, 
        right_annotation = row_ha2,
        col = c("black","white"),
    column_title = "Missing values",
    row_names_gp = gpar(fontsize = 5),
    column_names_gp = gpar(fontsize = 6))

X <- assay(se_Species) %>% as.matrix()
X2 <- X[ord,]
set.seed(2024)
Heatmap(X, name = "raw values", cluster_rows = FALSE, 
        cluster_columns = FALSE,
        top_annotation = column_ha, 
        right_annotation = row_ha,
        col = rev(inferno(20)),
    column_title = "raw values", na_col = "white",
    row_names_gp = gpar(fontsize = 5),
    column_names_gp = gpar(fontsize = 6))

set.seed(2024)
Heatmap(log2(X), name = "log values", cluster_rows = FALSE, 
        cluster_columns = FALSE,
        top_annotation = column_ha, 
        right_annotation = row_ha,
        col = viridis(20),
    column_title = "log values", na_col = "white",
    row_names_gp = gpar(fontsize = 5),
    column_names_gp = gpar(fontsize = 6))

Heatmap by cell line

for (i in levels(se_Species$Cell.Line)){
  ids = se_Species$Cell.Line==i
  X <- assay(se_Species)[,ids] %>% as.matrix()
  cd <- colData(se_Species)[ids,] 
  ord = order(cd$Treatment)
  rd <-  rowData(se_Species)
  
  X2 = X[,ord]
  cd2 <- cd[ord,]
  row_ha = rowAnnotation(Lipid_Class = as.factor(rd$Lipid_Class), 
                         col = list(Lipid_Class = Lipid_Class_col))
  
  column_ha = HeatmapAnnotation(Treatment = cd2$Treatment, 
                                Repetition = cd2$Repetition,
                                col = list(Treatment = Treatment_col, 
                                           Repetition = Repetition_col))
  
  ht <- Heatmap(log2(X2), name = "log values", cluster_rows = FALSE, 
          cluster_columns = FALSE,
          left_annotation = row_ha,
          top_annotation = column_ha,
          clustering_method_rows = "ward.D2",
          clustering_method_columns = "ward.D2",
          col = viridis(20),
      column_title = paste0(i, " - no clustering"), 
      na_col = "white",
      row_names_gp = gpar(fontsize = 5),
      column_names_gp = gpar(fontsize = 6))
  print(ht)
  ht <- Heatmap(log2(X2), name = "log values", 
          cluster_rows = FALSE, 
          cluster_columns = TRUE,
          left_annotation = row_ha,
          top_annotation = column_ha,
          clustering_method_rows = "ward.D2",
          clustering_method_columns = "ward.D2",
          col = viridis(20),
      column_title = paste0(i, " - clustering of the samples"), 
      na_col = "white",
      row_names_gp = gpar(fontsize = 5),
      column_names_gp = gpar(fontsize = 6))
  print(ht)
}

4.3 Missing values by lipid class

df <- assay(se_Species) %>% 
  as.data.frame() %>% 
  rownames_to_column() %>%
  left_join(rownames_to_column(
    as.data.frame(rowData(se_Species))[,"Lipid_Class", 
                                           drop=FALSE])) %>%
  pivot_longer(-c(rowname, Lipid_Class))
## Joining with `by = join_by(rowname)`
df$Lipid_Class <- gsub(" ","", df$Lipid_Class)

na_lip_group <- df %>% group_by(Lipid_Class) %>%
  summarise(pc=sum(is.na(value))*100/n(), 
            nmiss = sum(is.na(value)), tot = n())
na_lip_group$prop <- paste0(na_lip_group$nmiss,"/", 
                            na_lip_group$tot)

ggplot(na_lip_group, aes(y=pc, x=Lipid_Class)) + 
    geom_bar(position="dodge", stat="identity")+
  ylab("proportion of missing values (%)") + 
  theme(axis.text.x = element_text(angle = 90, 
                                   vjust = 0.5, hjust=1))

pander(na_lip_group)
Lipid_Class pc nmiss tot prop
CE 13.95 167 1197 167/1197
CL 12.35 210 1701 210/1701
Cer 22.9 101 441 101/441
DG 9.768 80 819 80/819
FC 0 0 63 0/63
HexCer 11.51 29 252 29/252
PC 17.97 283 1575 283/1575
PCO 21.1 226 1071 226/1071
PE 12.22 177 1449 177/1449
PEO 20.28 230 1134 230/1134
PG 13.89 70 504 70/504
PI 24.39 292 1197 292/1197
PS 44.57 365 819 365/819
SM 10.76 61 567 61/567
TG 25.98 1195 4599 1195/4599

4.4 Filter out lipids with too many missing values

res_nNA <- nNA(se_Species)
hist(res_nNA$nNArows$pNA, main = "Frequency of missing values")

pander("removed features")

removed features

df <- res_nNA$nNArows %>% as.data.frame() %>% 
  dplyr::filter(pNA > 0.70)

X <- assay(se_Species[df$name,]) %>% as.matrix()
cd <- colData(se_Species)
ord <- cd %>% as.data.frame() %>% rownames_to_column() %>% 
  arrange(Cell.Line, Treatment) %>% pull(rowname)
X <- X[,ord]
cd <- cd[ord,]
rd <-  rowData(se_Species)[df$name,]

row_ha = rowAnnotation(Lipid_Class = as.factor(rd$Lipid_Class), 
                         col = list(Lipid_Class = Lipid_Class_col))
  
column_ha = HeatmapAnnotation(Treatment = cd$Treatment, 
                              Cell.Line = cd$Cell.Line,
                                col = list(Treatment = Treatment_col,
                                           Cell.Line = Cell.Line_col))

col1 = circlize::colorRamp2(seq(min(log2(assay(se_Species,1)), na.rm=TRUE), 
                      max(log2(assay(se_Species,1)), na.rm=TRUE), length = 20), 
                  viridis(20))


Heatmap(log2(X), name = "log values", cluster_rows = FALSE, 
          cluster_columns = FALSE,
          left_annotation = row_ha,
          top_annotation = column_ha,
          clustering_method_rows = "ward.D2",
          clustering_method_columns = "ward.D2",
          col = col1,
      column_title = "Filtered out lipids", 
      na_col = "white",
      row_names_gp = gpar(fontsize = 5),
      column_names_gp = gpar(fontsize = 6))

# filter out
se_Species <- QFeatures::filterNA(se_Species, pNA = 0.7)
se_Species
## class: SummarizedExperiment 
## dim: 271 63 
## metadata(0):
## assays(1): ''
## rownames(271): PC O-30:1 PC O-32:0 ... CL 76:4 CL 80:11
## rowData names(8): Lipid_Class Sum_Formula ... Number_OHs Method
## colnames(63): 22RV1_70 22RV1_78 ... PC3_58 PC3_65
## colData names(7): Number Cell.Line ... Prot.mg.ml g_cell_lines

5 Data distribution and transformation

5.1 Processing

## -----------------------------------
## log transformation
assay(se_Species,2) <- assay(logTransform(se_Species))
names(assays(se_Species)) <- c("raw","log")
assays(se_Species)$raw %>% as.data.frame()  %>% 
  rownames_to_column() %>% pivot_longer(-rowname) %>%
  group_by(rowname) %>% 
  mutate(mean_val = mean(value, na.rm=TRUE)) %>%
  ggplot(aes(x=mean_val, y = value)) + 
  geom_point(alpha=0.3)+ ggtitle("raw")
## Warning: Removed 3213 rows containing missing values or values outside the scale range
## (`geom_point()`).

assays(se_Species)$log %>% as.data.frame()  %>% 
  rownames_to_column() %>% pivot_longer(-rowname) %>%
  group_by(rowname) %>% 
  mutate(mean_val = mean(value, na.rm=TRUE)) %>%
  ggplot(aes(x=mean_val, y = value)) + 
  geom_point(alpha=0.3) + ggtitle("log")
## Warning: Removed 3213 rows containing missing values or values outside the scale range
## (`geom_point()`).

5.2 Normalize

Normalisation method: center.median

## center.median
# library(MsCoreUtils)

log_vals = assays(se_Species)[["log"]]
log_vals_complete = na.omit(log_vals)
# log_vals_complete = log_vals
cM = matrixStats::colMedians(log_vals_complete, na.rm=TRUE)
assays_se = assays(se_Species)[["log"]]

for (i in 1:ncol(se_Species)){
  assays_se[,i] <- assays(se_Species)[["log"]][,i] - cM[i]
}

boxplot(assays_se)

# replace normalised assay
assays(se_Species)$logNorm <- assays_se

5.3 Save the data

write.xlsx(assays(se_Species)$raw, 
           file = paste0("processed_data/R02_",what_data,".xlsx"),
           sheetName = "assay_raw",append = FALSE)
write.xlsx(assays(se_Species)$log, 
           file = paste0("processed_data/R02_",what_data,".xlsx"),
           sheetName = "assay_log2",append = TRUE)
# write.xlsx(assays(se_Species)$logNorm, 
#            file = paste0("processed_data/R02_",what_data,".xlsx"),
#            sheetName = "assay_logNorm",append = TRUE)
write.xlsx(colData(se_Species), 
           file = paste0("processed_data/R02_",what_data,".xlsx"),
           sheetName = "colData",append = TRUE)
write.xlsx(rowData(se_Species), 
           file = paste0("processed_data/R02_",what_data,".xlsx"),
           sheetName = "rowData",append = TRUE)

# write.csv(assays(se_Species)$raw, 
#           file = paste0("processed_data/R02_",what_data,"_assay_raw.csv"))
# write.csv(assays(se_Species)$log, 
#           file = paste0("processed_data/R02_",what_data,"_assay_log.csv"))
# write.csv(assays(se_Species)$logNorm, 
#           file = paste0("processed_data/R02_",what_data,"_assay_logNorm.csv"))
# write.csv(colData(se_Species), 
#           file = paste0("processed_data/R02_",what_data,"_colData.csv"))
# write.csv(rowData(se_Species), 
#           file = paste0("processed_data/R02_",what_data,"_rowData.csv"))
limma::plotDensities(assay(se_Species,1),legend = FALSE, 
                     main = names(assays(se_Species))[1])

limma::plotDensities(assay(se_Species,2),legend = FALSE, 
                     main = names(assays(se_Species))[2], 
                     col = as.numeric(as.factor(colData(se_Species)$Cell.Line)))

limma::plotDensities(assay(se_Species,3),legend = FALSE, 
                     main = names(assays(se_Species))[3], 
                     col = as.numeric(as.factor(colData(se_Species)$Cell.Line)))

boxplot(assay(se_Species,1), main = names(assays(se_Species))[1])

boxplot(assay(se_Species,2), main = names(assays(se_Species))[2])

boxplot(assay(se_Species,3), main = names(assays(se_Species))[3])

5.4 Final data

We decide here to work with log transformed data that are not normalised.

se <- se_Species 

save(se, file = paste0("processed_data/R02_se_",what_data,".rda"))

6 PCA

6.1 Based on all the lipids

# based on observed lipids across all cell lines
cd = colData(se) %>% as.data.frame() %>% 
  rownames_to_column("name")
rd <- rowData(se) %>% as.data.frame() %>% 
  rownames_to_column("name")

se_filtered = se
rd_filtered <- rd

# log
dat_pca <- t(assays(se_filtered)$log)

if (sum(is.na(dat_pca)) > 0){
  # impute missing values 
  message('imputing missing values for PCA')
  nb = estim_ncpPCA(dat_pca,ncp.max=5)
  print("criterion: ")
  print(nb$criterion)
  print("ncp: ")
  print(nb$ncp)
  res.imp = imputePCA(dat_pca,ncp=nb$ncp, scale=FALSE)
  dat_pca <- res.imp$completeObs
}
## imputing missing values for PCA
## Warning in impute(X, ncp = ncp, scale = scale, method = method, ind.sup =
## ind.sup, : Stopped after 1000 iterations

## Warning in impute(X, ncp = ncp, scale = scale, method = method, ind.sup =
## ind.sup, : Stopped after 1000 iterations

## Warning in impute(X, ncp = ncp, scale = scale, method = method, ind.sup =
## ind.sup, : Stopped after 1000 iterations
## [1] "criterion: "
##         0         1         2         3         4         5 
## 2.4691663 1.1651985 0.7850093 0.6058171 0.3537313 0.3114451 
## [1] "ncp: "
## [1] 5
## Warning in impute(X, ncp = ncp, scale = scale, method = method, ind.sup =
## ind.sup, : Stopped after 1000 iterations
# pareto scaling
for (i in 1:ncol(dat_pca)){
  dat_pca[,i] = dat_pca[,i]/sqrt(sd(dat_pca[,i]))
}

res_pca <- dat_pca %>%
    prcomp(scale = FALSE, center = TRUE) 


# screeplot
res_pca %>% fviz_screeplot()

6.1.1 scores

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Cell.Line, 
                 title = "PCA scores",
                 axes = c(1,2), label="none", addEllipses = TRUE,
                 pointsize = 2.5)

data.frame(res_pca$x, Cell.Line = se_filtered$Cell.Line,
           Treatment = se_filtered$Treatment, 
           Repetition = se_filtered$Repetition) %>%
  ggplot(aes(x = PC1, y = PC2, 
               shape = Cell.Line, color = Repetition))+
  geom_point() + theme_bw()

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(1,2), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Cell.Line, 
                 title = "PCA scores",
                 axes = c(3,4), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(3,4), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(5,6), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(7,8), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

6.1.2 loadings

res_pca %>%
  fviz_pca_var(axes = c(1,2),
               geom = "point")

res_pca %>%
  fviz_pca_var(axes = c(3,4),
               geom = "point")

res_pca %>%
  fviz_pca_var(axes = c(5,6),
               geom = "point")

p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC1, y = PC2, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)
p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC3, y = PC4, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)
p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC5, y = PC6, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)

6.2 Based on observed lipids across all cell lines.

# based on observed lipids across all cell lines
cd = colData(se) %>% as.data.frame() %>% 
  rownames_to_column("name")
rd <- rowData(se) %>% as.data.frame() %>% 
  rownames_to_column("name")

dat = assays(se)$log %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname, names_to = "name") %>%
  left_join(cd, by="name") %>% 
  dplyr::group_by(rowname, Cell.Line) %>%
  summarize(sumnoNA = sum(!is.na(value)))
## `summarise()` has grouped output by 'rowname'. You can override using the
## `.groups` argument.
names_filter = unique(dat$rowname[dat$sumnoNA==0])
table(dat$sumnoNA)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
## 104  10   3   3   1   4   6   2   3   5   3   3   4  13   5   5   6  12  10  11 
##  20  21 
##  22 578
se_filtered = se[!rownames(se) %in% names_filter,]
rd_filtered <- rd[!rownames(se) %in% names_filter,]

# log
dat_pca <- t(assays(se_filtered)$log)

if (sum(is.na(dat_pca)) > 0){
  # impute missing values 
  message('imputing missing values for PCA')
  nb = estim_ncpPCA(dat_pca,ncp.max=5)
  print("criterion: ")
  print(nb$criterion)
  print("ncp: ")
  print(nb$ncp)
  res.imp = imputePCA(dat_pca,ncp=nb$ncp, scale=FALSE)
  dat_pca <- res.imp$completeObs
}
## imputing missing values for PCA
## [1] "criterion: "
##         0         1         2         3         4         5 
## 2.5349352 1.1916307 0.8176203 0.6251655 0.3495380 0.3053218 
## [1] "ncp: "
## [1] 5
# pareto scaling
for (i in 1:ncol(dat_pca)){
  dat_pca[,i] = dat_pca[,i]/sqrt(sd(dat_pca[,i]))
}
res_pca <- dat_pca %>%
    prcomp(scale = FALSE, center = TRUE) 

# screeplot
res_pca %>% fviz_screeplot()

6.2.1 scores

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Cell.Line, 
                 title = "PCA scores",
                 axes = c(1,2), label="none", addEllipses = TRUE,
                 pointsize = 2.5)

data.frame(res_pca$x, Cell.Line = se_filtered$Cell.Line,
           Treatment = se_filtered$Treatment, 
           Repetition = se_filtered$Repetition) %>%
  ggplot(aes(x = PC1, y = PC2, 
               shape = Cell.Line, color = Repetition))+
  geom_point() + theme_bw()

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(1,2), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Cell.Line, 
                 title = "PCA scores",
                 axes = c(3,4), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(3,4), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(5,6), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

res_pca %>%
    fviz_pca_ind(habillage = se_filtered$Treatment, 
                 title = "PCA scores",
                 axes = c(7,8), label="none", addEllipses = FALSE,
                 pointsize = 2.5)

6.2.2 loadings

res_pca %>%
  fviz_pca_var(axes = c(1,2),
               geom = "point")

res_pca %>%
  fviz_pca_var(axes = c(3,4),
               geom = "point")

res_pca %>%
  fviz_pca_var(axes = c(5,6),
               geom = "point")

p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC1, y = PC2, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)
p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC3, y = PC4, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)
p <- data.frame(res_pca$rotation, rd_filtered) %>% 
  ggplot(aes(x = PC5, y = PC6, 
             color = Lipid_Class, 
             label = name)) + geom_point() +
  scale_color_manual(values = Lipid_Class_col) 
ggplotly(p)

7 Missing lipids between cell lines

FIXME: to do, heatmap with appropriate scale

se_filtered2 = se[names_filter,]
X <- assays(se_filtered2)$log %>% as.matrix()
# X <- is.na(X)

ord <- order(rowSums(is.na(X[,1:3])))
X <- matrix(as.numeric(X), ncol = ncol(X))
# X <- X[ord,]

cd <- colData(se_filtered2)
rd <-  rowData(se_filtered2)[ord,] 


column_ha = HeatmapAnnotation(Cell.Line = cd$Cell.Line, 
                              Treatment = cd$Treatment,
                              col = list(Cell.Line = Cell.Line_col,
                                         Treatment = Treatment_col))
Lipid_Class_col <- rainbow(nlevels(as.factor(rd$Lipid_Class)))
names(Lipid_Class_col) <- levels(as.factor(rd$Lipid_Class))
row_ha = rowAnnotation(Lipid_Class = as.factor(rd$Lipid_Class), 
                       col = list(Lipid_Class = Lipid_Class_col))

col1 = circlize::colorRamp2(seq(min(assays(se)$log, na.rm=TRUE), 
                      max(assays(se)$log, na.rm=TRUE), length = 20), 
                  viridis(20))

set.seed(2024)
Heatmap(X, name = "log values", cluster_rows = FALSE, 
        cluster_columns = FALSE,
        top_annotation = column_ha, 
        right_annotation = row_ha,
        col = col1,na_col = "white",
    column_title = "lipids missing in one of the cell line",
    row_names_gp = gpar(fontsize = 5),
    column_names_gp = gpar(fontsize = 6))

8 DEA by cell lines

Differential expression analysis by cell lines

se_list <- list()
for (i in unique(se$Cell.Line)){
  se_list[[i]] <- se[,se$Cell.Line == i]
  # remove features completely missing
  id_na <- which(rowSums(is.na(assays(se_list[[i]])$log))==ncol(se_list[[i]]))
  se_list[[i]] <- se_list[[i]][-id_na,]
}
  • regression with proDA, by cell line
  • formula: Treatment
  • treatments are compared to the Control group
for (ii in 1:length(se_list)){ 
  se <- se_list[[ii]]
  cellLineName <- names(se_list)[ii]
  pander(cellLineName)
  
  ## -----------------------------------
  ## Statistical analyses

  form <- ~ se$Treatment 
  cat("formula: ", as.character(form))
  design <- model.matrix(form)
  fitlog <- proDA(assays(se)$log, design = design,
                          data_is_log_transformed = TRUE,
                           verbose = FALSE)
  fitlog$convergence$successful
  fitlognorm <- proDA(assays(se)$logNorm, design = design,
                          data_is_log_transformed = TRUE,
                           verbose = FALSE)
  fitlognorm$convergence$successful
  
  effets <- colnames(design)[colnames(design)!="(Intercept)"]
  effets_sub <- sub("se\\$Treatment", "",effets)
  
  ## log --------------------------------
  res <- list()
  for (j in 1:length(effets)){
    res[[j]] <- test_diff(fitlog, 
                          contrast = paste0("`",effets[j],"`"),
                       alternative = "two.sided",
                       pval_adjust_method = "BH",
                       sort_by = NULL,
                       verbose = F) %>%
      as_tibble() %>% dplyr::rename("lipid" = "name", 
                             "adj.P.Val" = "adj_pval","logFC" = "diff")
    res[[j]]$sign <- 0
    res[[j]]$sign[res[[j]]$logFC>0 &res[[j]]$adj.P.Val<0.05 ] = 1
    res[[j]]$sign[res[[j]]$logFC<0 &res[[j]]$adj.P.Val<0.05 ] = -1
  }
  
  names(res) <- effets_sub
  res_log <- res
  
  logFCs <- do.call(cbind,sapply(res, function(x) x[,"logFC"]))
  adj.P.Vals <- do.call(cbind,sapply(res, function(x) x[,"adj.P.Val"]))
  rownames(logFCs) <- res[[1]]$lipid
  rownames(adj.P.Vals) <- res[[1]]$lipid
  
  VP_log <- list()
  for (k in 1:length(res)){
    df <- res[[k]]
    VP_log[[k]] <- ggplot(df, aes(x = logFC, y = -log10(adj.P.Val),
                  label = lipid)) +
      geom_point() +
      geom_hline(yintercept = -log10(0.05)) +
      theme(legend.position = "none")+ 
      ggtitle(paste0(names(res)[k], " - log - ",cellLineName))
  }
  # VP_log
   
  
  ## lognorm --------------------------------
  res <- list()
  for (j in 1:length(effets)){
    res[[j]] <- test_diff(fitlognorm, 
                          contrast = paste0("`",effets[j],"`"),
                       alternative = "two.sided",
                       pval_adjust_method = "BH",
                       sort_by = NULL,
                       verbose = F) %>%
      as_tibble() %>% dplyr::rename("lipid" = "name", 
                             "adj.P.Val" = "adj_pval","logFC" = "diff")
    res[[j]]$sign <- 0
    res[[j]]$sign[res[[j]]$logFC>0 &res[[j]]$adj.P.Val<0.05 ] = 1
    res[[j]]$sign[res[[j]]$logFC<0 &res[[j]]$adj.P.Val<0.05 ] = -1
  }
  names(res) <- effets_sub
  res_lognorm <- res
  
  logFCs <- do.call(cbind,sapply(res, function(x) x[,"logFC"]))
  adj.P.Vals <- do.call(cbind,sapply(res, function(x) x[,"adj.P.Val"]))
  
  rownames(logFCs) <- res[[1]]$lipid
  rownames(adj.P.Vals) <- res[[1]]$lipid
  
  colnames(logFCs) <- sub("\\.logFC","",colnames(logFCs))
  colnames(adj.P.Vals) <- sub("\\.adj.P.Val","",colnames(adj.P.Vals))
  
  
  DT::datatable(cbind(round(logFCs,3), 
                      format(adj.P.Vals, digits=2)))
  
  VP_logNorm <- list()
  for (k in 1:length(res)){
    df <- res[[k]]
    VP_logNorm[[k]] <- ggplot(df, aes(x = logFC, y = -log10(adj.P.Val),
                  label = lipid)) +
      geom_point() +
      geom_hline(yintercept = -log10(0.05)) +
      theme(legend.position = "none")+ 
      ggtitle(paste0(names(res)[k], " - lognorm - ",cellLineName))
  }
  # VP_logNorm
  
  for (i in 1:length(VP_logNorm)){
    
    print(VP_log[[i]] + VP_logNorm[[i]])
    
    # up
    ll <- list(log = res_log[[i]]$lipid[res_log[[i]]$sign==1], 
    lognorm = res_lognorm[[i]]$lipid[res_lognorm[[i]]$sign==1])
    if (prod(sapply(ll, length))>0){
      plot(Venn(ll))
    }
    
    # down
    ll <- list(log = res_log[[i]]$lipid[res_log[[i]]$sign==-1], 
    lognorm = res_lognorm[[i]]$lipid[res_lognorm[[i]]$sign==-1])
    if (prod(sapply(ll, length))>0){
      plot(Venn(ll))
    }
  }
  
  pander("Compare across conditions (log)")
  
  df <- do.call(cbind,sapply(res_log, function(x) x[,"sign"]))
  
  # upregulated and significant
  pander("upregulated and significant")
  us <- df %>% as.data.frame() %>% dplyr::mutate_all(~ifelse(.x==1,1,0))
  if(sum(us>0)){
    UpSetR::upset(as.data.frame(us), keep.order = TRUE, nsets = ncol(us))%>%
      print()
  }
  
  # downregulated and significant
  pander("downregulated and significant")
  us <- df %>% as.data.frame() %>% dplyr::mutate_all(~ifelse(.x==-1,1,0))
  if(sum(us>0)){
     UpSetR::upset(as.data.frame(us), keep.order = TRUE, nsets = ncol(us))%>%
      print()
  }
  
  pander("logFC comparison")
  upperfun <- function(data,mapping){
    ggplot(data = data, mapping = mapping)+
      geom_point()+
      geom_abline(intercept=0,slope=1, color="gray")
  }
  lowerfun <- function(data,mapping){
    ggplot(data = data, mapping = mapping)+
      geom_point()+
      geom_abline(intercept=0,slope=1, color="gray")
  }
  
  p <- ggpairs(as.data.frame(logFCs),upper = list(continuous = wrap(upperfun)),
          lower = list(continuous = wrap(lowerfun)),
          diag = list(continuous = "blankDiag"))
  p
  pander("interactive plot")
  ggplotly(p)
  
  pander("ORA (log)")
  ### ORA (log)
  
  ## ORA
  term2gene <- rowData(se) %>% as.data.frame() %>%
    rownames_to_column("lipid") %>% 
    select(c("Lipid_Class","lipid"))
  
  for (i in 1:length(VP_log)){
      print("======================")
      print(names(res_log)[i])
      lipid_DE_up = res_log[[i]]$lipid[res_log[[i]]$sign==1]
      lipid_DE_down = res_log[[i]]$lipid[res_log[[i]]$sign==-1]
      
      # up
      print("up")
      if(length(intersect(term2gene$lipid, lipid_DE_up))>0){
        res_enricher <- enricher(gene = lipid_DE_up, # genes_DE
                     universe      = res_log[[i]]$lipid,
                     pAdjustMethod = "BH",
                     pvalueCutoff  = 1, minGSSize = 1,
           TERM2GENE = term2gene, qvalueCutoff = 1)
      
        res_enricher %>%
          as_tibble() %>% print()
      }
      
      # down
      print("down")
      if(length(intersect(term2gene$lipid, lipid_DE_down))>0){
        res_enricher <- enricher(gene = lipid_DE_down, # genes_DE
                     universe      = res_log[[i]]$lipid,
                     pAdjustMethod = "BH",
                     pvalueCutoff  = 1, minGSSize = 1,
           TERM2GENE = term2gene, qvalueCutoff = 1)
      
        res_enricher %>%
          as_tibble() %>% print()
      }
  }
}
## formula:  ~ se$Treatment
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Can only have one: highlight
## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## [1] "======================"
## [1] "ALA10"
## [1] "up"
## # A tibble: 5 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 TG    TG          7/13      58/238  0.0182   0.0908 0.0764 TG 50:3/TG 5…     7
## 2 CE    CE          2/13      18/238  0.257    0.499  0.420  CE 18:3/CE 2…     2
## 3 PC    PC          2/13      20/238  0.300    0.499  0.420  PC 34:3/PC 3…     2
## 4 PI    PI          1/13      16/238  0.605    0.709  0.597  PI 33:0           1
## 5 PE    PE          1/13      21/238  0.709    0.709  0.597  PE 34:3           1
## [1] "down"
## [1] "======================"
## [1] "Fer1_3"
## [1] "up"
## [1] "down"
## [1] "======================"
## [1] "PunA2.5"
## [1] "up"
## # A tibble: 2 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 TG    TG          2/3       58/238   0.148    0.232  0.232 TG 50:4/TG 5…     2
## 2 PC    PC          1/3       20/238   0.232    0.232  0.232 PC 34:3           1
## [1] "down"
## [1] "======================"
## [1] "PunA5"
## [1] "up"
## # A tibble: 3 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 PC    PC          3/9       20/238  0.0309   0.0615 0.0216 PC 34:3/PC 3…     3
## 2 TG    TG          5/9       58/238  0.0410   0.0615 0.0216 TG 50:3/TG 5…     5
## 3 PE    PE          1/9       21/238  0.571    0.571  0.200  PE 34:3           1
## [1] "down"
## [1] "======================"
## [1] "PunA10"
## [1] "up"
## # A tibble: 5 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 TG    TG          7/14      58/238  0.0293    0.147  0.123 TG 50:3/TG 5…     7
## 2 PC    PC          3/14      20/238  0.102     0.254  0.214 PC 34:3/PC 3…     3
## 3 PE    PE          2/14      21/238  0.356     0.593  0.499 PE 34:3/PE 3…     2
## 4 PI    PI          1/14      16/238  0.633     0.678  0.571 PI 33:0           1
## 5 CE    CE          1/14      18/238  0.678     0.678  0.571 CE 18:3           1
## [1] "down"
## [1] "======================"
## [1] "PunA10_fer13"
## [1] "up"
## # A tibble: 5 × 9
##   ID    Description GeneRatio BgRatio  pvalue p.adjust qvalue geneID       Count
##   <chr> <chr>       <chr>     <chr>     <dbl>    <dbl>  <dbl> <chr>        <int>
## 1 TG    TG          9/16      58/238  0.00467   0.0234 0.0197 TG 48:3/TG …     9
## 2 PC    PC          3/16      20/238  0.140     0.350  0.294  PC 34:3/PC …     3
## 3 PE    PE          2/16      21/238  0.423     0.705  0.593  PE 34:3/PE …     2
## 4 PI    PI          1/16      16/238  0.684     0.728  0.613  PI 33:0          1
## 5 CE    CE          1/16      18/238  0.728     0.728  0.613  CE 18:3          1
## [1] "down"
## formula:  ~ se$Treatment

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## [1] "======================"
## [1] "ALA10"
## [1] "up"
## # A tibble: 5 × 9
##   ID    Description GeneRatio BgRatio   pvalue p.adjust  qvalue geneID     Count
##   <chr> <chr>       <chr>     <chr>      <dbl>    <dbl>   <dbl> <chr>      <int>
## 1 TG    TG          12/17     70/241  0.000285  0.00142 0.00120 TG 48:3/T…    12
## 2 PC    PC          2/17      21/241  0.448     0.799   0.673   PC 34:3/P…     2
## 3 DG    DG          1/17      13/241  0.623     0.799   0.673   DG 36:4        1
## 4 CE    CE          1/17      19/241  0.765     0.799   0.673   CE 18:3        1
## 5 PE    PE          1/17      21/241  0.799     0.799   0.673   PE 34:3        1
## [1] "down"
## [1] "======================"
## [1] "Fer1_3"
## [1] "up"
## [1] "down"
## [1] "======================"
## [1] "PunA2.5"
## [1] "up"
## # A tibble: 4 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 PE    PE          2/7       21/241   0.116    0.466  0.466 PE 34:3/PE 3…     2
## 2 TG    TG          3/7       70/241   0.330    0.476  0.476 TG 48:3/TG 5…     3
## 3 CE    CE          1/7       19/241   0.441    0.476  0.476 CE 18:3           1
## 4 PC    PC          1/7       21/241   0.476    0.476  0.476 PC 34:3           1
## [1] "down"
## # A tibble: 1 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID     Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl> <lgl>  <chr>      <int>
## 1 SM    SM          1/1       9/241   0.0373   0.0373 NA     SM 41:1;O2     1
## [1] "======================"
## [1] "PunA5"
## [1] "up"
## # A tibble: 4 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 PC    PC          2/9       21/241   0.179    0.327  0.327 PC 34:3/PC 3…     2
## 2 PE    PE          2/9       21/241   0.179    0.327  0.327 PE 34:3/PE 3…     2
## 3 TG    TG          4/9       70/241   0.246    0.327  0.327 TG 48:3/TG 5…     4
## 4 CE    CE          1/9       19/241   0.529    0.529  0.529 CE 18:3           1
## [1] "down"
## # A tibble: 1 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID  Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl> <lgl>  <chr>   <int>
## 1 DG    DG          1/1       13/241  0.0539   0.0539 NA     DG 34:4     1
## [1] "======================"
## [1] "PunA10"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio  pvalue p.adjust qvalue geneID       Count
##   <chr> <chr>       <chr>     <chr>     <dbl>    <dbl>  <dbl> <chr>        <int>
## 1 TG    TG          13/23     70/241  0.00351   0.0211 0.0185 TG 48:3/TG …    13
## 2 PE    PE          4/23      21/241  0.125     0.375  0.329  PE 34:3/PE …     4
## 3 PC    PC          3/23      21/241  0.323     0.646  0.567  PC 34:3/PC …     3
## 4 DG    DG          1/23      13/241  0.738     0.863  0.757  DG 36:4          1
## 5 PI    PI          1/23      13/241  0.738     0.863  0.757  PI 33:0          1
## 6 CE    CE          1/23      19/241  0.863     0.863  0.757  CE 18:3          1
## [1] "down"
## [1] "======================"
## [1] "PunA10_fer13"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio   pvalue p.adjust   qvalue geneID    Count
##   <chr> <chr>       <chr>     <chr>      <dbl>    <dbl>    <dbl> <chr>     <int>
## 1 TG    TG          17/27     70/241  0.000103 0.000619 0.000543 TG 48:3/…    17
## 2 PE    PE          4/27      21/241  0.195    0.586    0.514    PE 34:3/…     4
## 3 PC    PC          3/27      21/241  0.426    0.853    0.748    PC 34:3/…     3
## 4 DG    DG          1/27      13/241  0.795    0.905    0.794    DG 36:4       1
## 5 PI    PI          1/27      13/241  0.795    0.905    0.794    PI 33:0       1
## 6 CE    CE          1/27      19/241  0.905    0.905    0.794    CE 18:3       1
## [1] "down"
## formula:  ~ se$Treatment

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## Warning: Can only have one: highlight

## [1] "======================"
## [1] "ALA10"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio  pvalue p.adjust qvalue geneID       Count
##   <chr> <chr>       <chr>     <chr>     <dbl>    <dbl>  <dbl> <chr>        <int>
## 1 TG    TG          11/21     53/230  0.00203   0.0122 0.0107 TG 48:3/TG …    11
## 2 CE    CE          3/21      17/230  0.193     0.579  0.508  CE 18:3/CE …     3
## 3 PC    PC          3/21      21/230  0.297     0.594  0.521  PC 34:3/PC …     3
## 4 PE    PE          2/21      22/230  0.622     0.749  0.657  PE 34:4/PE …     2
## 5 DG    DG          1/21      13/230  0.722     0.749  0.657  DG 36:4          1
## 6 PI    PI          1/21      14/230  0.749     0.749  0.657  PI 33:0          1
## [1] "down"
## # A tibble: 1 × 9
##   ID    Description GeneRatio BgRatio  pvalue p.adjust qvalue geneID       Count
##   <chr> <chr>       <chr>     <chr>     <dbl>    <dbl> <lgl>  <chr>        <int>
## 1 TG    TG          4/4       53/230  0.00258  0.00258 NA     TG 48:0/TG …     4
## [1] "======================"
## [1] "Fer1_3"
## [1] "up"
## # A tibble: 1 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID  Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl> <lgl>  <chr>   <int>
## 1 PE    PE          1/1       22/230  0.0957   0.0957 NA     PE 36:6     1
## [1] "down"
## [1] "======================"
## [1] "PunA2.5"
## [1] "up"
## # A tibble: 5 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 PE    PE          3/9       22/230  0.0439    0.219  0.185 PE 34:3/PE 3…     3
## 2 PC    PC          2/9       21/230  0.193     0.484  0.407 PC 34:3/PC 3…     2
## 3 PI    PI          1/9       14/230  0.438     0.632  0.532 PI 33:0           1
## 4 CE    CE          1/9       17/230  0.505     0.632  0.532 CE 18:3           1
## 5 TG    TG          2/9       53/230  0.655     0.655  0.552 TG 50:4/TG 5…     2
## [1] "down"
## [1] "======================"
## [1] "PunA5"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl>  <dbl> <chr>         <int>
## 1 TG    TG          7/16      53/230  0.0478    0.287  0.251 TG 48:3/TG 5…     7
## 2 PC    PC          3/16      21/230  0.169     0.374  0.328 PC 34:3/PC 3…     3
## 3 PE    PE          3/16      22/230  0.187     0.374  0.328 PE 34:3/PE 3…     3
## 4 DG    DG          1/16      13/230  0.619     0.720  0.631 DG 36:4           1
## 5 PI    PI          1/16      14/230  0.647     0.720  0.631 PI 33:0           1
## 6 CE    CE          1/16      17/230  0.720     0.720  0.631 CE 18:3           1
## [1] "down"
## # A tibble: 1 × 9
##   ID    Description GeneRatio BgRatio pvalue p.adjust qvalue geneID        Count
##   <chr> <chr>       <chr>     <chr>    <dbl>    <dbl> <lgl>  <chr>         <int>
## 1 TG    TG          2/2       53/230  0.0523   0.0523 NA     TG 52:0/TG 6…     2
## [1] "======================"
## [1] "PunA10"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio  pvalue p.adjust qvalue geneID       Count
##   <chr> <chr>       <chr>     <chr>     <dbl>    <dbl>  <dbl> <chr>        <int>
## 1 TG    TG          10/20     53/230  0.00533   0.0320 0.0281 TG 48:3/TG …    10
## 2 PC    PC          3/20      21/230  0.270     0.472  0.414  PC 34:3/PC …     3
## 3 PE    PE          3/20      22/230  0.296     0.472  0.414  PE 34:3/PE …     3
## 4 DG    DG          2/20      13/230  0.315     0.472  0.414  DG 36:3/DG …     2
## 5 PI    PI          1/20      14/230  0.731     0.799  0.701  PI 33:0          1
## 6 CE    CE          1/20      17/230  0.799     0.799  0.701  CE 18:3          1
## [1] "down"
## [1] "======================"
## [1] "PunA10_fer13"
## [1] "up"
## # A tibble: 6 × 9
##   ID    Description GeneRatio BgRatio   pvalue p.adjust  qvalue geneID     Count
##   <chr> <chr>       <chr>     <chr>      <dbl>    <dbl>   <dbl> <chr>      <int>
## 1 TG    TG          12/21     53/230  0.000404  0.00242 0.00212 TG 48:3/T…    12
## 2 PC    PC          3/21      21/230  0.297     0.648   0.569   PC 34:3/P…     3
## 3 PE    PE          3/21      22/230  0.324     0.648   0.569   PE 34:3/P…     3
## 4 DG    DG          1/21      13/230  0.722     0.816   0.715   DG 36:4        1
## 5 PI    PI          1/21      14/230  0.749     0.816   0.715   PI 33:0        1
## 6 CE    CE          1/21      17/230  0.816     0.816   0.715   CE 18:3        1
## [1] "down"
## # A tibble: 13 × 9
##    ID     Description GeneRatio BgRatio    pvalue p.adjust   qvalue geneID Count
##    <chr>  <chr>       <chr>     <chr>       <dbl>    <dbl>    <dbl> <chr>  <int>
##  1 PC O   PC O        12/79     13/230  0.0000140 0.000181 0.000147 PC O-…    12
##  2 SM     SM          8/79      9/230   0.000979  0.00636  0.00515  SM 32…     8
##  3 PG     PG          5/79      7/230   0.0487    0.192    0.155    PG 34…     5
##  4 PC     PC          11/79     21/230  0.0590    0.192    0.155    PC 30…    11
##  5 HexCer HexCer      3/79      4/230   0.118     0.308    0.249    HexCe…     3
##  6 PE O   PE O        7/79      14/230  0.163     0.332    0.269    PE O-…     7
##  7 PE     PE          10/79     22/230  0.179     0.332    0.269    PE 34…    10
##  8 PS     PS          4/79      11/230  0.559     0.908    0.735    PS 34…     4
##  9 PI     PI          3/79      14/230  0.915     1.00     0.810    PI 38…     3
## 10 DG     DG          2/79      13/230  0.971     1.00     0.810    DG 36…     2
## 11 CL     CL          4/79      25/230  0.992     1.00     0.810    CL 68…     4
## 12 CE     CE          2/79      17/230  0.994     1.00     0.810    CE 18…     2
## 13 TG     TG          8/79      53/230  1.00      1.00     0.810    TG 46…     8
cd <- colData(se)%>% 
  as.data.frame() %>%
  rownames_to_column()

pander("up regulated")
# visualise up regulated
lips <- df %>% dplyr::filter(sign.RF == 1) %>% pull(lipid)
assays(se_Species)$log[lips,] %>% as.data.frame() %>%
  rownames_to_column() %>% pivot_longer(-rowname) %>%
  left_join(cd, by = c("name"="rowname")) %>% 
  dplyr::filter(Treatment %in% c("Control", "PunA10"), 
         Cell.Line == ref_cell_line) %>% 
  ggplot(aes(y = value, x = Treatment, color=Repetition)) +
  geom_jitter(height=0) + facet_wrap(~rowname, scale="free_y") + 
  ggtitle("log and upregulated")

pander("down regulated")
# visualise down regulated
lips <- df %>% dplyr::filter(sign.RF == -1) %>% pull(lipid)

assays(se_Species)$log[lips,] %>% as.data.frame() %>%
  rownames_to_column() %>% pivot_longer(-rowname) %>%
  left_join(cd, by = c("name"="rowname")) %>% 
  dplyr::filter(Treatment %in% c("Control", "PunA10"), 
         Cell.Line == ref_cell_line) %>% 
  ggplot(aes(y = value, x = Treatment, color=Repetition)) +
  geom_jitter(height=0) + facet_wrap(~rowname, scale="free_y")+ 
  ggtitle("log and downregulated")

9 sessionInfo

sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Brussels
## tzcode source: internal
## 
## attached base packages:
##  [1] parallel  grid      stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] GGally_2.2.1                Vennerable_3.0             
##  [3] xtable_1.8-6                gtools_3.9.5               
##  [5] reshape_0.8.9               RColorBrewer_1.1-3         
##  [7] RBGL_1.78.0                 graph_1.80.0               
##  [9] clusterProfiler_4.10.0      doParallel_1.0.17          
## [11] iterators_1.0.14            foreach_1.5.2              
## [13] missForest_1.5              lipidr_2.16.0              
## [15] patchwork_1.2.0             limma_3.58.1               
## [17] proDA_1.16.0                UpSetR_1.4.0               
## [19] knitr_1.45                  glmnet_4.1-8               
## [21] Matrix_1.6-5                randomForest_4.7-1.1       
## [23] caret_6.0-94                lattice_0.22-5             
## [25] xlsx_0.6.5                  plotly_4.10.4              
## [27] viridis_0.6.5               viridisLite_0.4.2          
## [29] ComplexHeatmap_2.18.0       FactoMineR_2.10            
## [31] factoextra_1.0.7            readxl_1.4.3               
## [33] gplots_3.1.3.1              missMDA_1.19               
## [35] QFeatures_1.12.0            MultiAssayExperiment_1.28.0
## [37] pander_0.6.5                ggrepel_0.9.5              
## [39] lubridate_1.9.3             forcats_1.0.0              
## [41] stringr_1.5.1               purrr_1.0.2                
## [43] readr_2.1.5                 tidyr_1.3.1                
## [45] tibble_3.2.1                tidyverse_2.0.0            
## [47] SummarizedExperiment_1.32.0 Biobase_2.62.0             
## [49] GenomicRanges_1.54.1        GenomeInfoDb_1.38.6        
## [51] IRanges_2.36.0              S4Vectors_0.40.2           
## [53] BiocGenerics_0.48.1         MatrixGenerics_1.14.0      
## [55] matrixStats_1.2.0           dplyr_1.1.4                
## [57] ggplot2_3.5.0              
## 
## loaded via a namespace (and not attached):
##   [1] nnet_7.3-19             DT_0.32                 Biostrings_2.70.2      
##   [4] TH.data_1.1-2           vctrs_0.6.5             digest_0.6.34          
##   [7] png_0.1-8               shape_1.4.6.1           parallelly_1.37.1      
##  [10] MASS_7.3-60.0.1         reshape2_1.4.4          qvalue_2.34.0          
##  [13] withr_3.0.0             MultiDataSet_1.30.0     ropls_1.34.0           
##  [16] xfun_0.42               ggfun_0.1.4             ellipsis_0.3.2         
##  [19] ggpubr_0.6.0            survival_3.5-8          doRNG_1.8.6            
##  [22] memoise_2.0.1           gmm_1.8                 emmeans_1.10.0         
##  [25] gson_0.1.0              calibrate_1.7.7         tidytree_0.4.6         
##  [28] zoo_1.8-12              GlobalOptions_0.1.2     KEGGREST_1.42.0        
##  [31] scatterplot3d_0.3-44    httr_1.4.7              rstatix_0.7.2          
##  [34] globals_0.16.2          rstudioapi_0.15.0       pan_1.9                
##  [37] generics_0.1.3          DOSE_3.28.2             curl_5.2.1             
##  [40] zlibbioc_1.48.0         ggraph_2.2.0            polyclip_1.10-6        
##  [43] GenomeInfoDbData_1.2.11 SparseArray_1.2.4       evaluate_0.23          
##  [46] S4Arrays_1.2.0          BiocFileCache_2.10.1    hms_1.1.3              
##  [49] colorspace_2.1-0        filelock_1.0.3          magrittr_2.0.3         
##  [52] ggtree_3.10.1           MsCoreUtils_1.14.1      future.apply_1.11.1    
##  [55] shadowtext_0.1.3        cowplot_1.1.3           class_7.3-22           
##  [58] pillar_1.9.0            nlme_3.1-164            caTools_1.18.2         
##  [61] compiler_4.3.0          stringi_1.8.3           gower_1.0.1            
##  [64] jomo_2.7-6              minqa_1.2.6             tmvtnorm_1.6           
##  [67] extraDistr_1.10.0       plyr_1.8.9              crayon_1.5.2           
##  [70] abind_1.4-5             gridGraphics_0.5-1      graphlayouts_1.1.0     
##  [73] bit_4.0.5               sandwich_3.1-0          pcaMethods_1.94.0      
##  [76] fastmatch_1.1-4         codetools_0.2-19        multcomp_1.4-25        
##  [79] recipes_1.0.10          crosstalk_1.2.1         bslib_0.6.1            
##  [82] GetoptLong_1.0.5        splines_4.3.0           circlize_0.4.16        
##  [85] Rcpp_1.0.12             dbplyr_2.4.0            HDO.db_0.99.1          
##  [88] cellranger_1.1.0        leaps_3.1               blob_1.2.4             
##  [91] utf8_1.2.4              clue_0.3-65             AnnotationFilter_1.26.0
##  [94] lme4_1.1-35.1           itertools_0.1-3         fs_1.6.3               
##  [97] listenv_0.9.1           ggsignif_0.6.4          ggplotify_0.1.2        
## [100] estimability_1.5        statmod_1.5.0           tzdb_0.4.0             
## [103] tweenr_2.0.3            pkgconfig_2.0.3         tools_4.3.0            
## [106] cachem_1.0.8            RSQLite_2.3.5           DBI_1.2.2              
## [109] impute_1.76.0           fastmap_1.1.1           rmarkdown_2.25         
## [112] scales_1.3.0            broom_1.0.5             sass_0.4.8             
## [115] coda_0.19-4.1           ggstats_0.5.1           carData_3.0-5          
## [118] rpart_4.1.23            farver_2.1.1            tidygraph_1.3.1        
## [121] scatterpie_0.2.1        yaml_2.3.8              cli_3.6.2              
## [124] lifecycle_1.0.4         mvtnorm_1.2-4           lava_1.7.3             
## [127] backports_1.4.1         BiocParallel_1.36.0     timechange_0.3.0       
## [130] gtable_0.3.4            rjson_0.2.21            pROC_1.18.5            
## [133] ape_5.7-1               jsonlite_1.8.8          mitml_0.4-5            
## [136] bitops_1.0-7            multcompView_0.1-9      bit64_4.0.5            
## [139] yulab.utils_0.1.4       mice_3.16.0             highr_0.10             
## [142] jquerylib_0.1.4         GOSemSim_2.28.1         imputeLCMD_2.1         
## [145] timeDate_4032.109       lazyeval_0.2.2          htmltools_0.5.7        
## [148] rJava_1.0-11            enrichplot_1.22.0       GO.db_3.18.0           
## [151] glue_1.7.0              XVector_0.42.0          RCurl_1.98-1.14        
## [154] treeio_1.26.0           gridExtra_2.3           boot_1.3-30            
## [157] flashClust_1.01-2       igraph_2.0.2            R6_2.5.1               
## [160] labeling_0.4.3          xlsxjars_0.6.1          cluster_2.1.6          
## [163] rngtools_1.5.2          aplot_0.2.2             ipred_0.9-14           
## [166] nloptr_2.0.3            DelayedArray_0.28.0     tidyselect_1.2.0       
## [169] ProtGenerics_1.34.0     ggforce_0.4.2           qqman_0.1.9            
## [172] car_3.1-2               AnnotationDbi_1.64.1    future_1.33.1          
## [175] ModelMetrics_1.2.2.2    munsell_0.5.0           KernSmooth_2.23-22     
## [178] data.table_1.15.2       htmlwidgets_1.6.4       fgsea_1.28.0           
## [181] rlang_1.1.3             Cairo_1.6-2             norm_1.0-11.1          
## [184] fansi_1.0.6             hardhat_1.3.1           prodlim_2023.08.28